
************************************************************************************

* This program adds information about diabetes and hypertension to the Medicare claims data

* Uses data produced in 1. Collect Data.do
* 			bcarrier_claims_`yr'_reshaped.dta
*			medpar_all_file_reshaped_`yr'.dta
*			outpatient_base_claims_reshaped_`yr'.dta

* Combines data that we put together in spreadsheets:
*			hypertension_CDS.dta
*			diabetes_CDS.dta
*			Chronic.xlsx (which is put together from information from NHIS)

************************************************************************************



set more off
capture log close
log using "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out\2.mapDiseasev2.log" , append
clear all
global dataIn "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-In"
global dataOut "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out\"


adopath +  N:\SIL-Common\estout
adopath +  N:\SIL-Common\outreg2
adopath +  N:\SIL-Common\reghdfe-master\package


**********************
*CLEANS UP THE HEALTH CONDITION DATA
**********************

*Hypertension
foreach j in 5 4 3 { 
	tempfile hyp_`j'digit
	use "$dataIn/hypertension_CDS.dta"
	 tostring ICD, replace
	 gen lenICD=length(ICD)
	 drop if lenICD==1
	 drop lenICD
	 gen hypertension=1
	 gen icd=substr(ICD,1,`j')
 keep icd malignant benign unspecified primary_hyp secondary_hyp hypertension compl_hyp

 foreach var of varlist * {
	local k="`var'"
	rename `var' `k'_`j'
	}
	
 duplicates drop
 duplicates tag icd_`j', gen(tag)
	tab tag
	drop if tag>0
	drop tag
	sort icd_`j'
	save `hyp_`j'digit', replace
 clear
}

*Diabetes
foreach j in 5 4 3 { 
	tempfile diab_`j'digit
	use "$dataIn/diabetes_CDS.dta"
	 tostring ICD, replace
	 gen lenICD=length(ICD)
	 drop if lenICD==1
	 drop lenICD
	 gen diabetes=1
	 gen icd=substr(ICD,1,`j')

foreach var of varlist * {
	local k="`var'"
	 rename `var' `k'_`j'
}

keep icd_`j' complication typeii
	rename complication diab_compl
	gen diabetes=1
	duplicates drop
	duplicates tag icd_`j', gen(tag)

	tab tag
	drop if tag>0
	drop tag
	sort icd_`j'

	save `diab_`j'digit', replace
	clear
}

*Chronic
foreach j in 4 3 { 
	tempfile chronic_`j'digit
	import excel "$dataIn/Chronic.xlsx", sheet("NHIS") firstrow
	 tostring ICD, replace
	 gen lenICD=length(ICD)
	 keep if lenICD==`j'
	 drop lenICD
	 gen icd=substr(ICD,1,`j')

 foreach var of varlist * {
	local k="`var'"
	rename `var' `k'_`j'
}

	keep icd_`j' chronic
	duplicates drop
	duplicates tag icd_`j', gen(tag)
	 tab tag
	 drop if tag>0
	 drop tag
	sort icd_`j'
 save `chronic_`j'digit', replace
 clear
}

**********************
*MERGES THE CLAIMS DATA WITH THE HEALTH CONDITION INFORMATION
* Merges on 5-digit icd9 code then 4-digit icd9 code and then the 3-digit icd9 code
* Most important for diabetes. Less important for other characteristics (don't exploit hypertension designations; chronic designations accurate at 3-digit level)
**********************
*loops over the inpatient, outpatient, and ambulatory files
foreach k in "bcarrier_claims" "medpar_all_file" "outpatient_base_claims" { 
 foreach yr of numlist 2005(1)2012 {
   use "$dataIn/`k'_reshaped_`yr'.dta", replace
*Hypertension
foreach j in 5 4 3 {   	
   	gen icd_`j'=substr(ICD,1,`j')
   	sort icd_`j'
   	merge m:1 icd_`j' using `hyp_`j'digit'
   		drop if _merge==2
   		drop _merge
 	}
*Diabetes
foreach j in 5 4 3 {   	
   	sort icd_`j'
   	merge m:1 icd_`j' using `diab_`j'digit'
   		drop if _merge==2
   		drop _merge
 	}
*Chronic
foreach j in 4 3 {   	
   	sort icd_`j'
   	merge m:1 icd_`j' using `chronic_`j'digit'
   		drop if _merge==2
   		drop _merge
 	}


capture destring icd_3,gen(icd3_num) force

*ICD9 chapters
  *Infectious
	gen byte icdbin = 1 if icd3_num > 0 & icd3_num < 140
	*Neoplasms
	replace icdbin = 2 if icd3_num > 139 & icd3_num <239
	*Endocrine
	replace icdbin = 3 if icd3_num > 240 & icd3_num < 280
	*Blood
	replace icdbin = 4 if icd3_num > 279 & icd3_num < 290
	*Mental Health
	replace icdbin = 5 if icd3_num > 289 & icd3_num < 320
	*Nervous System 1
	replace icdbin = 6 if icd3_num > 319 & icd3_num < 360
	*Nervous System 2
	replace icdbin = 7 if icd3_num > 359 & icd3_num < 390
	*Cardiology
	replace icdbin = 8 if icd3_num > 389 & icd3_num < 460
	*Respiratory
	replace icdbin = 9 if icd3_num > 459 & icd3_num < 520
	*Gastro
	replace icdbin = 10 if icd3_num > 519 & icd3_num < 580
	*Genito-urinary
	replace icdbin = 11 if icd3_num > 579 & icd3_num < 630
	*Pregnancy
	replace icdbin = 12 if icd3_num > 629 & icd3_num < 680
	*Skin
	replace icdbin = 13 if icd3_num > 679 & icd3_num < 710
	*Musculo-Skeletal
	replace icdbin = 14 if icd3_num > 709 & icd3_num < 740
	*Congenital
	replace icdbin = 15 if icd3_num > 739 & icd3_num < 760
	*Perinatal
	replace icdbin = 16 if icd3_num > 759 & icd3_num < 780
	*signs/symptoms
	replace icdbin = 17 if icd3_num > 779 & icd3_num < 800
	*injury/poison
	replace icdbin = 18 if icd3_num > 799 & icd3_num < 1000

foreach var of varlist malignant* benign* unspecified* primary_hyp* secondary_hyp* hypertension* diab*  compl_hyp* typeii* chronic*  {
 replace `var'=0 if `var'==.
}
 
levelsof icdbin, local(icdloc) 
foreach i of local icdloc { 
 gen byte icd`i'_acute=(chronic_4==0 & chronic_3==0 & icdbin==`i')
 gen byte icd`i'_chronic=((chronic_4==1 | chronic_3==1) &  icdbin==`i')
 }
 
gen date=mdy(month(CLM_THRU_DT),1,year(CLM_THRU_DT)) 
format date %d 

save "$dataOut/bene_icdpre_`k'_`yr'.dta", replace

 
	}
}	
